phaletes <- read_excel("../data/New Home Study- SVOCs_University of Toronto_March2024.xlsx", sheet = "Passive Air - PDMS (pg.m-3)",range = "A5:N145")
## New names:
## • `` -> `...6`
colnames(phaletes)[which(names(phaletes) == "House ID")] <- "House_ID" #change column names
colnames(phaletes)[which(names(phaletes) == "Sample ID")] <- "Sample_ID" #change column names 
colnames(phaletes)[which(names(phaletes) == "Period (month)")] <- "Period"
phaletes_detected <- phaletes[phaletes$Period %in% c(0, 3, 6, 9, 12), ]
phaletes_filled <- phaletes_detected %>%
  mutate(
DEP = ifelse(DEP == "<DL", 50, as.numeric(DEP)),
DPP = ifelse(DPP == "<DL", 101, as.numeric(DPP)),
DiBP = ifelse(DiBP == "<DL", 80, as.numeric(DiBP)),
DnBP = ifelse(DnBP == "<DL", 103, as.numeric(DPP)),
BzBP = ifelse(BzBP == "<DL", 87, as.numeric(BzBP)),
DEHP = ifelse(DEHP == "<DL", 75, as.numeric(DEHP)),
DnOP = ifelse(DnOP == "<DL", 69, as.numeric(DnOP)),
DiNP = ifelse(DiNP == "<DL", 102, as.numeric(DiNP))
) %>% 
  mutate(Period = as.numeric(as.character(Period))) %>%
  arrange(House_ID)
## Warning: There were 7 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `DEP = ifelse(DEP == "<DL", 50, as.numeric(DEP))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 6 remaining warnings.
complete_houses <- phaletes_filled %>%
  group_by(House_ID) %>%
  filter(n_distinct(Period) == 5) %>%
  ungroup()
tech_survey <- read_excel("../data/20231027_Technician Survey Data.xlsx")
que_survey <- read_excel("../data/20231027_0-12 Month Questionnaire Data.xlsx")
tech_survey_data <- tech_survey %>% select(id,sprayfoam_ins_yn, fiberglass_ins_yn, fiberglass_ins_yn, Styrofoam_ins_yn, cellulose_ins_yn, unknown_ins_yn,other_ins_yn)
que_survey_data <- que_survey %>% select(id, date, nailpolish_yn, perfume_yn, hspray_yn, n_kmat_new,n_tmat_new, n_mat_cover_new ,n_uph_chair_new
,n_uph_sofa_new
,n_uph_ottoman_new
,n_uph_kids_new
,n_uph_other_new
,n_comp_dining_new
,n_comp_bed_new
,n_comp_dresser_new
,n_comp_desk_new
,n_comp_accent_new
,dogcat_yn
,cig_yn
,cannabis_yn
,paints_yn
,low_VOC_home
)
# Ensure distinct 'id' values by keeping the last occurrence
que_survey_data_last <- que_survey_data %>%
  arrange(desc(row_number())) %>%
  distinct(id, .keep_all = TRUE) %>%
  arrange(row_number())

insulation type: a house can have multiple insulation types, Polystyrene is the one that normally release VOC?? flooring type: a house can have multiple flooring types wall cover/material: didn’t find the info about it in survey Use of personal care products (Nail polish, perfume, hair spray, etc; combine into one column): I combine them in one column, presence of any one of them, recorded as 1 else 0 New furniture (again, combine): not sure about how to combine Pet (combine all kinds of pets: cat, dog, etc) Smoking (keep nicotine and cannabis separate): none of the household smoke cig, only one do cannabis Cleaning (maybe look at total number of cleaning days): there are multiple cleaning methods paint

full_data_tech <- merge(phaletes_filled, tech_survey_data, by.x = 'House_ID', by.y='id') 
full_data_survey <- merge(phaletes_filled, que_survey_data_last, by.x = 'House_ID', by.y='id')
full_data_survey <- full_data_survey %>%
  mutate( care_product_usage = if_else(nailpolish_yn == 1 | perfume_yn == 1 | hspray_yn == 1, 1, 0))
ggplot(full_data_tech, aes(x = Period, y = DEP, group = as.factor(House_ID), color = as.factor(Styrofoam_ins_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = 'Styrofoam') +
  ggtitle("DEP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_tech, aes(x = Period, y = DPP, group = as.factor(House_ID), color = as.factor(Styrofoam_ins_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = 'Styrofoam') +
  ggtitle("DPP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_tech, aes(x = Period, y = DiBP, group = as.factor(House_ID), color = as.factor(Styrofoam_ins_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = 'Styrofoam') +
  ggtitle("DiBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_tech, aes(x = Period, y = DnBP, group = as.factor(House_ID), color = as.factor(Styrofoam_ins_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = 'Styrofoam') +
  ggtitle("DnBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_tech, aes(x = Period, y = BzBP, group = as.factor(House_ID), color = as.factor(Styrofoam_ins_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = 'Styrofoam') +
  ggtitle("BzBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_tech, aes(x = Period, y = DEHP, group = as.factor(House_ID), color = as.factor(Styrofoam_ins_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = 'Styrofoam') +
  ggtitle("DEHP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_tech, aes(x = Period, y = DnOP, group = as.factor(House_ID), color = as.factor(Styrofoam_ins_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = 'Styrofoam') +
  ggtitle("DnOP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_tech, aes(x = Period, y = DiNP, group = as.factor(House_ID), color = as.factor(Styrofoam_ins_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = 'Styrofoam') +
  ggtitle("DiNP versus Month for Different Homes") +
  theme_minimal()

## Styrofoam has very few data, but it does seem to has some effect by just looking at the line plots.

ggplot(full_data_survey, aes(x = Period, y = DEP, group = as.factor(House_ID), color = as.factor(care_product_usage))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "Care Product Usage") +
  ggtitle("DEP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DPP, group = as.factor(House_ID), color = as.factor(care_product_usage))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "Care Product Usage") +
  ggtitle("DPP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DiBP, group = as.factor(House_ID), color = as.factor(care_product_usage))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "Care Product Usage") +
  ggtitle("DiBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DnBP, group = as.factor(House_ID), color = as.factor(care_product_usage))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "Care Product Usage") +
  ggtitle("DnBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = BzBP, group = as.factor(House_ID), color = as.factor(care_product_usage))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "Care Product Usage") +
  ggtitle("BzBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DEHP, group = as.factor(House_ID), color = as.factor(care_product_usage))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "Care Product Usage") +
  ggtitle("DEHP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DnOP, group = as.factor(House_ID), color = as.factor(care_product_usage))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "Care Product Usage") +
  ggtitle("DnOP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DiNP, group = as.factor(House_ID), color = as.factor(care_product_usage))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "Care Product Usage") +
  ggtitle("DiNP versus Month for Different Homes") +
  theme_minimal()

### personal care product has siginificant effect.

ggplot(full_data_survey, aes(x = Period, y = DEP, group = as.factor(House_ID), color = as.factor(cannabis_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "cannabis_yn") +
  ggtitle("DEP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DPP, group = as.factor(House_ID), color = as.factor(cannabis_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "cannabis_yn") +
  ggtitle("DPP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DiBP, group = as.factor(House_ID), color = as.factor(cannabis_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "cannabis_yn") +
  ggtitle("DiBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DnBP, group = as.factor(House_ID), color = as.factor(cannabis_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "cannabis_yn") +
  ggtitle("DnBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = BzBP, group = as.factor(House_ID), color = as.factor(cannabis_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "cannabis_yn") +
  ggtitle("BzBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DEHP, group = as.factor(House_ID), color = as.factor(cannabis_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "cannabis_yn") +
  ggtitle("DEHP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DnOP, group = as.factor(House_ID), color = as.factor(cannabis_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "cannabis_yn") +
  ggtitle("DnOP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DiNP, group = as.factor(House_ID), color = as.factor( cannabis_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "cannabis_yn") +
  ggtitle("DiNP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DEP, group = as.factor(House_ID), color = as.factor(dogcat_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "pet_ownership") +
  ggtitle("DEP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DPP, group = as.factor(House_ID), color = as.factor(dogcat_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "pet_ownership") +
  ggtitle("DPP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DiBP, group = as.factor(House_ID), color = as.factor(dogcat_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "pet_ownership") +
  ggtitle("DiBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DnBP, group = as.factor(House_ID), color = as.factor(dogcat_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "pet_ownership") +
  ggtitle("DnBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = BzBP, group = as.factor(House_ID), color = as.factor(dogcat_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "pet_ownership") +
  ggtitle("BzBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DEHP, group = as.factor(House_ID), color = as.factor(dogcat_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "pet_ownership") +
  ggtitle("DEHP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DnOP, group = as.factor(House_ID), color = as.factor(dogcat_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "pet_ownership") +
  ggtitle("DnOP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DiNP, group = as.factor(House_ID), color = as.factor( dogcat_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "pet_ownership") +
  ggtitle("DiNP versus Month for Different Homes") +
  theme_minimal()

Household owns a pet or pets tend to having a increasing trend

ggplot(full_data_survey, aes(x = Period, y = DEP, group = as.factor(House_ID), color = as.factor(paints_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "painting") +
  ggtitle("DEP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DPP, group = as.factor(House_ID), color = as.factor(paints_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "painting") +
  ggtitle("DPP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DiBP, group = as.factor(House_ID), color = as.factor(paints_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "painting") +
  ggtitle("DiBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DnBP, group = as.factor(House_ID), color = as.factor(paints_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "painting") +
  ggtitle("DnBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = BzBP, group = as.factor(House_ID), color = as.factor(paints_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "painting") +
  ggtitle("BzBP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DEHP, group = as.factor(House_ID), color = as.factor(paints_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "painting") +
  ggtitle("DEHP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DnOP, group = as.factor(House_ID), color = as.factor(paints_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "painting") +
  ggtitle("DnOP versus Month for Different Homes") +
  theme_minimal()

ggplot(full_data_survey, aes(x = Period, y = DiNP, group = as.factor(House_ID), color = as.factor(paints_yn))) +
  geom_line() +
  labs(x = "Month", y = "Concentration", color = "painting") +
  ggtitle("DiNP versus Month for Different Homes") +
  theme_minimal()